/****************************************************************************
 *
 * Copyright 2016 Samsung Electronics All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific
 * language governing permissions and limitations under the License.
 *
 ****************************************************************************/
/*
 * Copyright (c) 2010, Swedish Institute of Computer Science
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the Institute nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/**
 * \file
 *      Lexical analyzer for AQL, the Antelope Query Language.
 * \author
 *      Nicolas Tsiftes <nvt@sics.se>
 */

/****************************************************************************
 * Included Files
 ****************************************************************************/
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "aql.h"

/****************************************************************************
 * Private Types
 ****************************************************************************/
struct keyword {
	char *string;
	token_t token;
};

/* The keywords are arranged primarily by length and
   secondarily by expected lookup frequency. */
static const struct keyword keywords[] = {
	{";", END},
	{"(", LEFT_PAREN},
	{")", RIGHT_PAREN},
	{",", COMMA},
	{"=", EQUAL},
	{">", GT},
	{"<", LT},
	{".", DOT},
	{"+", ADD},
	{"-", SUB},
	{"*", MUL},
	{"/", DIV},
	{"#", COMMENT},

	{">=", GEQ},				/* 13 */
	{"<=", LEQ},
	{"<>", NOT_EQUAL},
	{"<-", ASSIGN},
	{"OR", OR},
	{"IS", IS},
	{"ON", ON},
	{"IN", IN},

	{"ALL", ALL},				/* 21 */
	{"AND", AND},
	{"NOT", NOT},
	{"SUM", SUM},
	{"MAX", MAX},
	{"MIN", MIN},
	{"INT", INT},

	{"INTO", INTO},				/* 28 */
	{"FROM", FROM},
	{"MEAN", MEAN},
	{"JOIN", JOIN},
	{"LONG", LONG},
	{"TYPE", TYPE},

	{"WHERE", WHERE},			/* 34 */
	{"COUNT", COUNT},
	{"INDEX", INDEX},

	{"INSERT", INSERT},			/* 37 */
	{"SELECT", SELECT},
	{"REMOVE", REMOVE},
	{"CREATE", CREATE},
	{"MEDIAN", MEDIAN},
	{"DOMAIN", DOMAIN},
	{"STRING", STRING},
	{"INLINE", INLINE},
	{"REMAIN", REMAIN},

	{"PROJECT", PROJECT},		/* 46 */

	{"RELATION", RELATION},		/* 47 */

	{"ATTRIBUTE", ATTRIBUTE},	/* 48 */
	{"BPLUSTREE", BPLUSTREE}
};

/* Provides a pointer to the first keyword of a specific length. */
static const int8_t skip_hint[] = { 0, 13, 21, 28, 34, 37, 46, 47, 48 };

static char separators[] = "#.;,() \t\n";

/****************************************************************************
* Private Functions
****************************************************************************/
static token_t get_token_id(const char *string, const size_t length)
{
	int start, end;
	int i;

	if (sizeof(skip_hint) < length || length < 1) {
		return NONE;
	}

	start = skip_hint[length - 1];
	if (sizeof(skip_hint) == length) {
		end = sizeof(keywords) / sizeof(keywords[0]);
	} else {
		end = skip_hint[length];
	}
	for (i = start; i < end; i++) {
		if (strncasecmp(keywords[i].string, string, length) == 0) {
			return keywords[i].token;
		}
	}

	return NONE;
}

static int next_real(lexer_t *lexer, const char *s)
{
	char *end;
	long long_value;
#if DB_FEATURE_FLOATS
	float float_value;
#endif							/* DB_FEATURE_FLOATS */

	errno = 0;
	long_value = strtol(s, &end, 10);

#if DB_FEATURE_FLOATS
	if (*end == '.') {
		/* Process a float value. */
		float_value = strtof(s, &end);
		if (float_value == 0 && s == end) {
			return -1;
		}
		memcpy(lexer->value, &float_value, sizeof(float_value));
		*lexer->token = FLOAT_VALUE;
		lexer->input = end;

		return 1;
	}
#endif							/* DB_FEATURE_FLOATS */

	/* Process an integer value. */
	if (long_value == 0 && errno != 0) {
		return -1;
	}
	memcpy(lexer->value, &long_value, sizeof(long_value));
	*lexer->token = INTEGER_VALUE;
	lexer->input = end;

	return 1;
}

static int next_string(lexer_t *lexer, const char *s)
{
	char *end;
	size_t length;

	end = strchr(s, '\'');
	if (end == NULL) {
		return -1;
	}

	length = end - s;
	*lexer->token = STRING_VALUE;
	lexer->input = end + 1;		/* Skip the closing delimiter. */

	memcpy(lexer->value, s, length);
	(*lexer->value)[length] = '\0';

	return 1;
}

static int next_token(lexer_t *lexer, const char *s)
{
	size_t length;

	length = strcspn(s, separators);
	if (length == 0) {
		/* We encountered a separator, so we try to get a token of
		   precisely 1 byte. */
		length = 1;
	}

	*lexer->token = get_token_id(s, length);
	lexer->input = s + length;
	if (*lexer->token != NONE) {
		return 1;
	}

	/* The input did not constitute a valid token,
	   so we regard it as an identifier. */

	*lexer->token = IDENTIFIER;

	memcpy(lexer->value, s, length);
	(*lexer->value)[length] = '\0';

	return 1;
}

int lexer_next(lexer_t *lexer)
{
	const char *s;

	*lexer->token = NONE;
	s = lexer->input;
	s += strspn(s, " \t\n");

	lexer->prev_pos = s;

	switch (*s) {
	case '\'':
		/* Process the string that follows the delimiter. */
		return next_string(lexer, s + 1);
	case '\0':
		return 0;
	default:
		if (isdigit((int)*s) || (*s == '-' && isdigit((int)s[1]))) {
			return next_real(lexer, s);
		}

		/* Process a token. */
		return next_token(lexer, s);
	}
}

void lexer_rewind(lexer_t *lexer)
{
	lexer->input = lexer->prev_pos;
}
